# _*_ coding : utf-8 _*_
# @Time : 2025/5/6 19:36
# @Author : 梁满仓
# @File : scrape_a_web_page
# @Project : study_python

import requests
import os
import chardet


def download_webpage_enhanced(url, filename=None, folder='downloaded_pages'):
    """
    增强版网页下载器，自动检测编码

    参数同上
    """
    try:
        os.makedirs(folder, exist_ok=True)

        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }

        # 以二进制形式获取内容
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()

        # 检测编码
        encoding = chardet.detect(response.content)['encoding'] or 'utf-8'

        # 解码内容
        content = response.content.decode(encoding, errors='replace')

        if not filename:
            filename = url.split('//')[-1].replace('/', '_').replace('?', '_') + '.html'
            filename = filename[:100]

        filepath = os.path.join(folder, filename)

        with open(filepath, 'w', encoding='utf-8') as f:
            f.write(content)

        print(f"网页已成功保存到: {filepath}")
        return filepath

    except Exception as e:
        print(f"下载失败: {e}")
        return None


# 使用示例
download_webpage_enhanced('https://www.xyeduku.com/70629/10433955.html', '10433955.html')
